/*
 * Copyright (c) 2025 Huawei Technologies Co., Ltd.
 * This file is a part of the CANN Open Software.
 * Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
 * Please refer to the License for details. You may not use this file except in compliance with the License.
 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
 * INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
 * See LICENSE in the root of the software repository for the full text of the License.
 */

#include "matmul_async_custom_tiling.h"
#include "register/op_def_registry.h"
#include "tiling/platform/platform_ascendc.h"
#include "../testcase_params.h"

constexpr int32_t MIX_RATIO = 2;  // AIC:AIV = 1:2
namespace optiling {
static ge::graphStatus TilingFunc(gert::TilingContext* context)
{
    matmul_tiling::TPosition cPos;
    if (TESTCASE_PARAMS.mode == ComputeMode::ITERATE_ALL) {
        // use IterateAll, only support CTYPE::TPosition is GM
        cPos = matmul_tiling::TPosition::GM;
    } else if (TESTCASE_PARAMS.mode == ComputeMode::ITERATE) {
        // use Iterate, only support CTYPE::TPosition is VECIN
        cPos = matmul_tiling::TPosition::VECIN;
    } else {
        cPos = matmul_tiling::TPosition::GM;
    }
    auto ascendcPlatform = platform_ascendc::PlatformAscendC(context->GetPlatformInfo());
    auto aivCoreNum = platform_ascendc::PlatformAscendCManager::GetInstance()->GetCoreNum();
    auto shape_a = context->GetInputShape(0)->GetStorageShape();
    auto shape_b = context->GetInputShape(1)->GetStorageShape();
    int32_t M = shape_a.GetDim(0);
    int32_t N = shape_b.GetDim(1);
    int32_t K = shape_a.GetDim(1);
    matmul_tiling::MultiCoreMatmulTiling cubeTiling(ascendcPlatform);
    MatmulAsyncCustomTilingData tiling;
    bool res = ComputeTiling(tiling.cubeTilingData, &cubeTiling, TESTCASE_PARAMS.m, TESTCASE_PARAMS.n, TESTCASE_PARAMS.k,
        aivCoreNum, TESTCASE_PARAMS.hasBias, cPos);
    if (!res) {
        return ge::GRAPH_FAILED;
    }

    uint32_t blockDim = cubeTiling.tiling_.get_usedCoreNum() / MIX_RATIO;
    context->SetBlockDim(blockDim);
    context->SetTilingKey(1);

    tiling.SaveToBuffer(context->GetRawTilingData()->GetData(), context->GetRawTilingData()->GetCapacity());
    context->GetRawTilingData()->SetDataSize(tiling.GetDataSize());
    size_t userWorkspaceSize = M * N * sizeof(float);  // async workspace, len is M * N;
    size_t systemWorkspaceSize = static_cast<size_t>(ascendcPlatform.GetLibApiWorkSpaceSize());
    size_t *currentWorkspace = context->GetWorkspaceSizes(1);
    currentWorkspace[0] = userWorkspaceSize + systemWorkspaceSize;

    return ge::GRAPH_SUCCESS;
}
}

namespace ops {
class MatmulAsyncCustom : public OpDef {
public:
    explicit MatmulAsyncCustom(const char* name) : OpDef(name)
    {
        this->Input("a")
            .ParamType(REQUIRED)
            .DataType({ge::DT_FLOAT16})
            .Format({ge::FORMAT_ND})
            .UnknownShapeFormat({ge::FORMAT_ND});
        this->Input("b")
            .ParamType(REQUIRED)
            .DataType({ge::DT_FLOAT16})
            .Format({ge::FORMAT_ND})
            .UnknownShapeFormat({ge::FORMAT_ND});
        this->Input("bias")
            .ParamType(REQUIRED)
            .DataType({ge::DT_FLOAT})
            .Format({ge::FORMAT_ND})
            .UnknownShapeFormat({ge::FORMAT_ND});
        this->Output("c")
            .ParamType(REQUIRED)
            .DataType({ge::DT_FLOAT})
            .Format({ge::FORMAT_ND})
            .UnknownShapeFormat({ge::FORMAT_ND});

        this->AICore()
            .SetTiling(optiling::TilingFunc);
        this->AICore().AddConfig("ascend910b");
    }
};

OP_ADD(MatmulAsyncCustom);
}